{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 05 How parameters change as data is shifted and scaled"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%%html\n",
""
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"from pandas import Series, DataFrame\n",
"import matplotlib.pyplot as plt\n",
"from scipy import stats"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"[khanacademy](https://www.khanacademy.org/math/ap-statistics/summarizing-quantitative-data-ap/linear-transformations-data/v/how-parameters-change-as-data-is-shifted-and-scaled)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"x = np.array([7, 7, 5, 8, 10, 13, 5, 3, 2, 3, 5, 6])"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"df = DataFrame({'Data': x, \n",
" 'Data+5': x+5,\n",
" 'Data*5': x*5})"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Data | \n",
" Data+5 | \n",
" Data*5 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 7 | \n",
" 12 | \n",
" 35 | \n",
"
\n",
" \n",
" 1 | \n",
" 7 | \n",
" 12 | \n",
" 35 | \n",
"
\n",
" \n",
" 2 | \n",
" 5 | \n",
" 10 | \n",
" 25 | \n",
"
\n",
" \n",
" 3 | \n",
" 8 | \n",
" 13 | \n",
" 40 | \n",
"
\n",
" \n",
" 4 | \n",
" 10 | \n",
" 15 | \n",
" 50 | \n",
"
\n",
" \n",
" 5 | \n",
" 13 | \n",
" 18 | \n",
" 65 | \n",
"
\n",
" \n",
" 6 | \n",
" 5 | \n",
" 10 | \n",
" 25 | \n",
"
\n",
" \n",
" 7 | \n",
" 3 | \n",
" 8 | \n",
" 15 | \n",
"
\n",
" \n",
" 8 | \n",
" 2 | \n",
" 7 | \n",
" 10 | \n",
"
\n",
" \n",
" 9 | \n",
" 3 | \n",
" 8 | \n",
" 15 | \n",
"
\n",
" \n",
" 10 | \n",
" 5 | \n",
" 10 | \n",
" 25 | \n",
"
\n",
" \n",
" 11 | \n",
" 6 | \n",
" 11 | \n",
" 30 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Data Data+5 Data*5\n",
"0 7 12 35\n",
"1 7 12 35\n",
"2 5 10 25\n",
"3 8 13 40\n",
"4 10 15 50\n",
"5 13 18 65\n",
"6 5 10 25\n",
"7 3 8 15\n",
"8 2 7 10\n",
"9 3 8 15\n",
"10 5 10 25\n",
"11 6 11 30"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"mean_std_df = df.describe()[1:3]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"median_iqr_df = DataFrame({'Data': [np.median(df['Data']), stats.iqr(df['Data'])],\n",
" 'Data+5': [np.median(df['Data+5']), stats.iqr(df['Data+5'])],\n",
" 'Data*5': [np.median(df['Data*5']), stats.iqr(df['Data*5'])]\n",
"}, index=['median', 'iqr'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"df = pd.concat([df, mean_std_df, median_iqr_df])"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Data | \n",
" Data+5 | \n",
" Data*5 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 7.000000 | \n",
" 12.000000 | \n",
" 35.000000 | \n",
"
\n",
" \n",
" 1 | \n",
" 7.000000 | \n",
" 12.000000 | \n",
" 35.000000 | \n",
"
\n",
" \n",
" 2 | \n",
" 5.000000 | \n",
" 10.000000 | \n",
" 25.000000 | \n",
"
\n",
" \n",
" 3 | \n",
" 8.000000 | \n",
" 13.000000 | \n",
" 40.000000 | \n",
"
\n",
" \n",
" 4 | \n",
" 10.000000 | \n",
" 15.000000 | \n",
" 50.000000 | \n",
"
\n",
" \n",
" 5 | \n",
" 13.000000 | \n",
" 18.000000 | \n",
" 65.000000 | \n",
"
\n",
" \n",
" 6 | \n",
" 5.000000 | \n",
" 10.000000 | \n",
" 25.000000 | \n",
"
\n",
" \n",
" 7 | \n",
" 3.000000 | \n",
" 8.000000 | \n",
" 15.000000 | \n",
"
\n",
" \n",
" 8 | \n",
" 2.000000 | \n",
" 7.000000 | \n",
" 10.000000 | \n",
"
\n",
" \n",
" 9 | \n",
" 3.000000 | \n",
" 8.000000 | \n",
" 15.000000 | \n",
"
\n",
" \n",
" 10 | \n",
" 5.000000 | \n",
" 10.000000 | \n",
" 25.000000 | \n",
"
\n",
" \n",
" 11 | \n",
" 6.000000 | \n",
" 11.000000 | \n",
" 30.000000 | \n",
"
\n",
" \n",
" mean | \n",
" 6.166667 | \n",
" 11.166667 | \n",
" 30.833333 | \n",
"
\n",
" \n",
" std | \n",
" 3.128559 | \n",
" 3.128559 | \n",
" 15.642793 | \n",
"
\n",
" \n",
" median | \n",
" 5.500000 | \n",
" 10.500000 | \n",
" 27.500000 | \n",
"
\n",
" \n",
" iqr | \n",
" 2.750000 | \n",
" 2.750000 | \n",
" 13.750000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Data Data+5 Data*5\n",
"0 7.000000 12.000000 35.000000\n",
"1 7.000000 12.000000 35.000000\n",
"2 5.000000 10.000000 25.000000\n",
"3 8.000000 13.000000 40.000000\n",
"4 10.000000 15.000000 50.000000\n",
"5 13.000000 18.000000 65.000000\n",
"6 5.000000 10.000000 25.000000\n",
"7 3.000000 8.000000 15.000000\n",
"8 2.000000 7.000000 10.000000\n",
"9 3.000000 8.000000 15.000000\n",
"10 5.000000 10.000000 25.000000\n",
"11 6.000000 11.000000 30.000000\n",
"mean 6.166667 11.166667 30.833333\n",
"std 3.128559 3.128559 15.642793\n",
"median 5.500000 10.500000 27.500000\n",
"iqr 2.750000 2.750000 13.750000"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Data | \n",
" Data+5 | \n",
" Data*5 | \n",
"
\n",
" \n",
" \n",
" \n",
" mean | \n",
" 6.166667 | \n",
" 11.166667 | \n",
" 30.833333 | \n",
"
\n",
" \n",
" std | \n",
" 3.128559 | \n",
" 3.128559 | \n",
" 15.642793 | \n",
"
\n",
" \n",
" median | \n",
" 5.500000 | \n",
" 10.500000 | \n",
" 27.500000 | \n",
"
\n",
" \n",
" iqr | \n",
" 2.750000 | \n",
" 2.750000 | \n",
" 13.750000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Data Data+5 Data*5\n",
"mean 6.166667 11.166667 30.833333\n",
"std 3.128559 3.128559 15.642793\n",
"median 5.500000 10.500000 27.500000\n",
"iqr 2.750000 2.750000 13.750000"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[-4:]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.12"
}
},
"nbformat": 4,
"nbformat_minor": 4
}